import requests
from bs4 import BeautifulSoup
from time import sleep

def getHtmlPage():
    headers = {'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763'}

    for i in range(965,1085):
        url = 'http://data.sports.sohu.com/nba/nba_players_rank.php?order_by=points&spm=smpc.fb-nba-home.top-dc.2.1620824904040nf6byr7'.format(i)
        resp = requests.get(url=url,headers=headers)
        resp.encoding = 'utf-8'
        parseHtml(resp.text)

def parseHtml(pageText):
    sleep(1)
    contentList = []
    soup = BeautifulSoup(pageText,'lxml')
    title = soup.title.text
    contentList.append(title)
    contentDiv = soup.select('blue > ul > li')
    for div in contentDiv:
        content = div.text
        contentList.append(content)

    fo = open(title+'.txt','w',encoding='utf-8')
    fo.writelines(contentList)
    fo.close()
    print(title+'页面爬取完毕')
    

if __name__ == '__main__':
    pageText = getHtmlPage()
    parseHtml(pageText)














    
